/*******************************************************************************
File    : 2005 Q1 Compilation.do
Project : Bank Expansion and Moneylender Interest Rates - RDD Evidence from India
Purpose : Process 2005 Q1 District level branch statistics from RBI
Author  : Kannan Narayanaswamy
Updated : 09 Feb 2026
Source  : Quarterly Statistics on Credit and Deposits - District Level - RBI
------------------------------------------------------------------------------*/

cls

*==========================*
* 0. Setup                 *
*==========================*

cd "`c(pwd)'\Data\MOF_Data\RBI_2005_Q1"

import excel "2005 Q1 VBA Compiled.xlsx", sheet("All Data") firstrow clear

* Drop unwanted columns (deposits and credits)
drop D E G H J K L N O Q R T

* Rename columns
rename A region_state_dist
rename B District
rename STATEBANKOFINDIA     sbi_2005
rename NATIONALISEDBANKS    onb_2005
rename FOREIGNBANKS         fb_2005
rename REGIONALRURALBANKS   rrb_2005
rename OTHERSCHEDULED       scb_2005
rename ALLSCHEDULED         all_2005

* Fix some “state” rows that sit in the District column
replace District = "N.C.T. Delhi"        if region_state_dist == "Delhi"
replace District = "Chandigarh"          if region_state_dist == "Chandigarh"
replace District = "Lakshadweep"         if region_state_dist == "Lakshadweep"
replace District = "Dadra & Nagar Haveli" if region_state_dist == "Dadra & Nagar Haveli"

* Drop header/blank rows
drop if inlist(region_state_dist,"REGION/STATE/District","")

* Remove “(Contd.)” and region rows
replace region_state_dist = subinstr(region_state_dist, " (Contd.)", "", .)
drop if strpos(region_state_dist, "REGION")

*==========================*
* 1. Build State & District*
*==========================*

* Remove rows where region_state_dist is just a numeric code
replace region_state_dist = "" if regexm(region_state_dist, "^[0-9]+$")

* Generate State names
gen State_UT = strtrim(proper(strlower(region_state_dist)))
replace State_UT = subinstr(State_UT,"  "," ",.)
replace State_UT = State_UT[_n-1] if State_UT == ""
replace State_UT = "Goa" if strpos(District, " Goa")

* Clean district names
replace District = strtrim(District)
replace District = subinstr(District,"  "," ",.)

* Drop pure state rows (no district name)
drop if District == ""

*==========================*
* 2. Harmonise names       *
*==========================*

* Correct obvious spelling mismatches 
* These corrections help mergining with Census_UB perfect
replace District = "Udupi"      if District == "Udipi"
replace District = "Khordha"    if District == "Khurda"
replace District = "Haora"      if District == "Howrah"
replace District = "Dohad"      if District == "Dahod"
replace District = "Palamu"     if District == "Palamau"
replace District = "Nuapada"    if District == "Nawapara"


*==========================*
* 3. Map carved-out dists  *
*==========================*

gen Parent_District = District

replace Parent_District = "Lohit"              if District == "Anjaw"
replace Parent_District = "Lower Subansiri"    if District == "Kurung Kumey"
replace Parent_District = "Dibang Valley"      if District == "Lower Dibang Valley"
replace Parent_District = "Kamrup"             if District == "Kamrup Metropolitan"
replace Parent_District = "Jehanabad"          if District == "Arwal"
replace Parent_District = "Palamu"             if District == "Latehar"
replace Parent_District = "Dumka"              if District == "Jamtara"
replace Parent_District = "Paschimi Singhbhum" if District == "Seraikela-Kharsawan"
replace Parent_District = "Shahdol"            if District == "Anuppur"
replace Parent_District = "Guna"               if District == "Ashoknagar"
replace Parent_District = "East Nimar"         if District == "Khandwa"
replace Parent_District = "East Nimar"         if District == "Burhanpur"
replace Parent_District = "Tuensang"           if inlist(District,"Kiphire","Longleng")
replace Parent_District = "Kohima"             if District == "Peren"
replace Parent_District = "Dharmapuri"         if District == "Krishnagiri"
replace Parent_District = "Medinpur_old"       if District == "Paschim Medinipur"
replace Parent_District = "Medinpur_old"       if District == "Purba Medinipur"

* Convert branch counts to numeric
foreach var in sbi_2005 onb_2005 fb_2005 rrb_2005 scb_2005 all_2005 {
    replace `var' = "" if `var' == "–"
    destring `var', replace
}

* Aggregate branches at parent district level
collapse (sum) sbi_2005 onb_2005 fb_2005 rrb_2005 scb_2005 all_2005, ///
    by(State_UT Parent_District)

rename Parent_District District

*==========================*
* 4. Final clean + save    *
*==========================*

gen ID_RBI = _n
order ID_RBI, before(State_UT)

* Recompute all_2005 just to be safe
replace all_2005 = sbi_2005 + onb_2005 + fb_2005 + rrb_2005 + scb_2005

save "2005_Q1.dta", replace
export excel "2005_Q1.xlsx", firstrow(variables) replace


*==============================================================*
* 5. Fuzzy Match RBI Districts to Census Districts (Census 2001)
*==============================================================*

* Path to Census data
*local census_path "C:\Users\akank\OneDrive\Desktop\ECON 551 - Devolopment Economics\01_Informal Credit Market\03_Analysis\Data\MOF_Data\Census 2001"
local census_path "`c(pwd)'/../Census 2001"

* Fuzzy merge using reclink
reclink State_UT District using "`census_path'/Census_UB.dta", ///
    idmaster(ID_RBI) idusing(ID_Census) gen(match_score)

preserve

    * Drop merge flag created by reclink
    drop _merge

    *----------------------------------------------------------*
    * Bring USING file back to check which Census rows did not 
    * get matched (i.e., districts in Census with no RBI match)
    *----------------------------------------------------------*
    merge 1:1 ID_Census using "`census_path'/Census_UB.dta", ///
        keepusing(State_UT District)

    * Optional: inspect unmatched Census districts
    tab District if _merge == 2

restore

* Keep only variables needed for the main dataset
keep ID_RBI State_UT District sbi_2005 onb_2005 fb_2005 rrb_2005 scb_2005 all_2005 pop_total Underbanked

save MOF_Census_2005_Q1, replace


*============================================*
* 6. Construct Underbanked Dummy (2005 data)
*============================================*

* Ratio: population per bank branch (higher = more underbanked)
gen ratio_banks = pop_total / all_2005

* Compute national totals as locals
summarize all_2005
local banks_total = r(sum)

summarize pop_total
local pop_total_all = r(sum)

* Compute national benchmark (population per branch)
local mean_2005 = `pop_total_all' / `banks_total'

* Demeaned ratio
gen B_Ratio_Dist_Ex = ratio_banks - `mean_2005'

* Underbanked indicator = 1 if district’s ratio above national mean
gen above_thresh = ratio_banks > `mean_2005'

save MOF_Census_2005_Q1, replace

cd ..\..\..

